import multiprocessing
import socket
import time
import re
import signal
import jieba
import requests
from bs4 import BeautifulSoup as bs4
from openpyxl import Workbook
from wordcloud.wordcloud import WordCloud
import matplotlib.pyplot as plt
import json

# 构造socket连接，和斗鱼api服务器相连接
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = socket.gethostbyname("danmuproxy.douyu.com")
port = 8601
client.connect((host, port))

# 弹幕查询正则表达式
danmu_re = re.compile(b'txt@=(.+?)/cid@')
username_re = re.compile(b'nn@=(.+?)/txt@')
level_re = re.compile(b'/level@=(.+?)/sahf@')


# 根据房间号获取房间名
def get_room_name(roomid):
    res = requests.get('http://www.douyu.com/' + str(roomid))
    soup = bs4(res.text, 'lxml')
    total = soup.find(name='div', attrs={"class": 'layout-Player-title'})
    t = total.find(name='h2', attrs={"class": 'Title-anchorNameH2'})
    try:
        name = ''.join(t.get_attribute_list('title'))
    except:
        name = ''
    return name


def send_req_msg(msgstr):
    """构造并发送符合斗鱼api的请求"""

    msg = msgstr.encode('utf-8')
    data_length = len(msg) + 8
    code = 689
    # 构造协议头
    msgHead = int.to_bytes(data_length, 4, 'little') \
              + int.to_bytes(data_length, 4, 'little') + \
              int.to_bytes(code, 4, 'little')
    client.send(msgHead)
    sent = 0
    while sent < len(msg):
        tn = client.send(msg[sent:])
        sent = sent + tn


# 数据保存至Excel中
def save_to_excel(room_name, barrage_list):
    wb = Workbook()
    ws = wb.active
    count = 0
    for bl in barrage_list:
        try:
            ws.append([bl[0], bl[1], bl[2]])
        except:
            print('第%d条弹幕信息保存失败' % count)
        count += 1
    if room_name == None:
        room_name = '未知房间'
    wb.save(r'弹幕.xlsx')


# 获取弹幕信息
def DM_start(roomid, barrage_num):
    # 构造登录授权请求
    msg = 'type@=loginreq/roomid@={}/\0'.format(roomid)
    send_req_msg(msg)
    # 构造获取弹幕消息请求
    msg_more = 'type@=joingroup/rid@={}/gid@=-9999/\0'.format(roomid)
    send_req_msg(msg_more)
    room_name = get_room_name(roomid)
    print('已连接至\"{}\"的直播间'.format(room_name))
    barrage_list = []
    barrage_list.append(['等级', '昵称', '弹幕'])
    print("弹幕正在获取中...")

    flag = True
    while flag:
        # 服务端返回的数据
        data = client.recv(1024)
        # 通过re模块找发送弹幕的用户名和内容
        danmu_level = level_re.findall(data)
        danmu_username = username_re.findall(data)
        danmu_content = danmu_re.findall(data)
        if not data:
            continue
        else:
            for i in range(0, len(danmu_content)):
                try:
                    # 输出信息
                    level_deutf8 = danmu_level[0].decode('utf8')
                    username_deutf8 = danmu_username[0].decode('utf8')
                    barrage_deutf8 = danmu_content[0].decode(encoding='utf8')
                    # print('[{}]:{}'.format(danmu_username[0].decode(
                    #     'utf8'), danmu_content[0].decode(encoding='utf8')))
                except:
                    continue
                barrage_list.append([level_deutf8, username_deutf8, barrage_deutf8])
                barrages = len(barrage_list)
                if barrages > barrage_num:
                    print('已成功获得%d条弹幕' % (barrages - 1))
                    flag = False
                    break
    # 制作词云
    all_barrages = ''
    for bl in barrage_list:
        all_barrages += str(bl[2])
    all_barrages = filterword(all_barrages)
    words = ' '.join(jieba.cut(all_barrages))
    # 这里设置字体路径
    Words_Cloud = WordCloud(background_color="black", width=900, height=600, max_words=100, font_path="simkai.ttf")
    process_word = WordCloud.process_text(Words_Cloud, words)
    Words_Cloud.generate_from_frequencies(process_word)
    print('成功生成词云...')
    image = Words_Cloud.to_image()
    plt.imshow(image)  # 显示图片
    plt.axis('off')  # 不显示坐标轴
    plt.savefig('barrages_cloud.jpg')
    plt.show()
    print('数据开始导入Excel中')
    save_to_excel(room_name, barrage_list)
    print('导入成功，保存在桌面')
    print(words)
    logout()


# 过滤函数：清洗数据，删除不必要的符号。
def filterword(filterdata):
    symbol = '，。“”~！@#￥%……&*（）——+=【】{}、|；：‘’《》？!#$^&()[]{};:",.<>/?\\-\n'
    for sym in symbol:
        filterdata = filterdata.replace(sym, '')
        filterdata = filterdata.strip(' ')
    return filterdata


def keeplive():
    """
    保持心跳，45秒心跳请求一次
     """
    while True:
        # msg = 'type@=keeplive/tick@=' + str(int(time.time())) + '/\0'
        msg = "type@=mrkl/"
        send_req_msg(msg)
        print('发送心跳包')
        time.sleep(45)


def logout():
    """
    与斗鱼服务器断开连接
    关闭线程
    """
    msg = 'type@=logout/'
    send_req_msg(msg)
    print('已经退出服务器')


def signal_handler(signal, frame):
    """
    捕捉 ctrl+c的信号 即 signal.SIGINT
    触发hander：
    登出斗鱼服务器
    关闭进程
    """
    p.terminate()
    logout()
    print('Bye')


# 保存直播间信息
def save_to_excel2(zhibo):
    wb = Workbook()
    ws = wb.active
    ws.append(['主播', '房间号', '房间名', '热度'])
    for i in zhibo:
        try:
            ws.append(i)
        except:
            print('第%条信息保存失败!' % i)
    wb.save(r'直播间信息' + '.xlsx')
    print('写入成功！')


# 爬取英雄联盟直播间信息
def catch():
    urls = ['https://www.douyu.com/gapi/rkc/directory/mixList/2_1/{}'.format(page) for page in range(1, 3)]
    zhibo = []
    for url in urls:
        res = requests.get(url)
        j = json.loads(res.text)  # 将已编码的 JSON 字符串解码为 Python 对象
        l1 = j['data']  # 通过观察可以发现要的数据在data下
        l2 = l1['rl']  # 在观察发现在data的rl中
        for i in range(len(l2)):  # 这里用到for循环来处理一个列表下多个字典的数据
            Anchor = l2[i]['nn']  # 获取主播名字
            RoomNumber = l2[i]['rid']  # 获取房间号
            Heat = l2[i]['ol']  # 获取热度
            RoomName = l2[i]['rn']  # 获取房间名
            zhibo.append([Anchor, RoomNumber, RoomName, int(Heat)])

    zhibo = sorted(zhibo, key=lambda x: x[3])  # 按热度高的排序
    zhibo.reverse()
    save_to_excel2(zhibo)
    # 输出热度前10名直播间信息
    tplt = '{0:{4}<10}\t{1:{4}<7}\t{2:{4}^15}\t{3:{4}<8}'
    print(tplt.format('主播', '房间号', '房间名', '热度', chr(12288)))
    for j in zhibo[:10]:
        print(tplt.format(j[0], str(j[1]), j[2], str(j[3]), chr(12288)))
    return zhibo


if __name__ == '__main__':
    zhibo = catch()
    room_id = input('请输入房间ID：')
    barrage_num = input('请输入需要的弹幕数量：')
    barrage_num = int(barrage_num)
    # 开启signal捕捉
    signal.signal(signal.SIGINT, signal_handler)
    DM_start(room_id, barrage_num)
    # 开启弹幕和心跳进程
    p = multiprocessing.Process(target=keeplive)
    p.start()
